{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "时序差分和策略迭代的结合体\n",
    "\n",
    "action模型根据state计算动作,value模型评估动作的价值,有点像GAN的思路\n",
    "\n",
    "使用delay模型,类似DQN的双模型的思路,缓解自举"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAR8AAAEXCAYAAACUBEAgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAZs0lEQVR4nO3df1BU973/8dcusAsIZxGV3VAhMlcT5euPJqh4mpmmrRsxpWms9Dup47XU+k1GszoqHafSKpk6ncFrZmpjq5iZTNV/DBlyi6lEk/JFg8m4oqI0iEqTO7Zw1V2Mhl2gsgvs+/6RcG42EuMi7IdNXo+ZMxPO+Sz7PsZ9znIOoElEBEREUWZWPQARfT0xPkSkBONDREowPkSkBONDREowPkSkBONDREowPkSkBONDREowPkSkhLL47N69G1OmTEFiYiLy8/Nx+vRpVaMQkQJK4vPaa6+hpKQEL7zwAs6dO4c5c+agoKAAHR0dKsYhIgVMKn6wND8/H/PmzcMf//hHAEAoFEJWVhbWrVuHzZs3f+njQ6EQrl27htTUVJhMptEel4jukYigq6sLmZmZMJvv/t4mPkozGYLBIBobG1FaWmrsM5vNcDqdcLvdQz4mEAggEAgYH1+9ehW5ubmjPisRDU97ezsmT5581zVRj89HH32EgYEB2O32sP12ux2XL18e8jHl5eX4zW9+c8f+9vZ2aJo2KnMSUeT8fj+ysrKQmpr6pWujHp/hKC0tRUlJifHx4Alqmsb4EI1B93I5JOrxmThxIuLi4uD1esP2e71eOByOIR9jtVphtVqjMR4RRUnU73ZZLBbk5eWhrq7O2BcKhVBXVwdd16M9DhEpouTLrpKSEhQXF2Pu3LmYP38+fv/736OnpwcrV65UMQ4RKaAkPs888wxu3LiBsrIyeDwefPOb38Rbb711x0VoIvrqUvJ9PvfL7/fDZrPB5/PxgjPRGBLJa5M/20VESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESkQcnxMnTuCpp55CZmYmTCYTDh06FHZcRFBWVoYHHngASUlJcDqd+OCDD8LW3Lp1C8uXL4emaUhLS8OqVavQ3d19XydCRLEl4vj09PRgzpw52L1795DHd+zYgV27dmHv3r1oaGjAuHHjUFBQgN7eXmPN8uXL0dLSgtraWtTU1ODEiRN47rnnhn8WRBR75D4AkOrqauPjUCgkDodDXnzxRWNfZ2enWK1WefXVV0VE5OLFiwJAzpw5Y6w5evSomEwmuXr16j09r8/nEwDi8/nuZ3wiGmGRvDZH9JrPlStX4PF44HQ6jX02mw35+flwu90AALfbjbS0NMydO9dY43Q6YTab0dDQMOTnDQQC8Pv9YRsRxbYRjY/H4wEA2O32sP12u9045vF4kJGREXY8Pj4e6enpxprPKy8vh81mM7asrKyRHJuIFIiJu12lpaXw+XzG1t7ernokIrpPIxofh8MBAPB6vWH7vV6vcczhcKCjoyPseH9/P27dumWs+Tyr1QpN08I2IoptIxqfnJwcOBwO1NXVGfv8fj8aGhqg6zoAQNd1dHZ2orGx0Vhz7NgxhEIh5Ofnj+Q4RDSGxUf6gO7ubnz44YfGx1euXEFTUxPS09ORnZ2NDRs24Le//S2mTZuGnJwcbN26FZmZmViyZAkAYMaMGVi8eDGeffZZ7N27F319fVi7di1+8pOfIDMzc8ROjIjGuEhvpR0/flwA3LEVFxeLyCe327du3Sp2u12sVqssXLhQWltbwz7HzZs3ZdmyZZKSkiKapsnKlSulq6vrnmfgrXaisSmS16ZJRERh+4bF7/fDZrPB5/Px+g/RGBLJazMm7nYR0VcP40NESjA+RKRExHe7iEaCiED6+xG4fh39fj9MCQmw2u2It9lgMplUj0dRwPhQ1IkI/vXhh7j+2mvovnQJA93dRnwmLV6MCU88gbjERNVj0ihjfCiqRATdLS34x0svIfiZ74SXYBC97e347337EOjoQOa//zvirFaFk9Jo4zUfiqq+jz9G+8svh4Xns6S/HzeOHMGtd95BDH4XCEWA8aGo+vjdd3G7re2ua6SvDzfefBMDPT1RmopUYHwoakL9/ehpbQXu4R1NoKMDoWAwClORKowPRU2/zwffuXOqx6AxgvGh6OJ1HPoU40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKRERPEpLy/HvHnzkJqaioyMDCxZsgStra1ha3p7e+FyuTBhwgSkpKSgqKgIXq83bE1bWxsKCwuRnJyMjIwMbNq0Cf39/fd/NkQUMyKKT319PVwuF06dOoXa2lr09fVh0aJF6OnpMdZs3LgRhw8fRlVVFerr63Ht2jUsXbrUOD4wMIDCwkIEg0GcPHkSBw4cwP79+1FWVjZyZ0VEY55JZPj/fu2NGzeQkZGB+vp6fPvb34bP58OkSZNw8OBB/PjHPwYAXL58GTNmzIDb7caCBQtw9OhR/OAHP8C1a9dgt9sBAHv37sUvf/lL3LhxAxaL5Uuf1+/3w2azwefzQdO04Y5PURa8eRMtzz+P0O3bX7rWnJyM/7NnDyzp6VGYjEZKJK/N+7rm4/P5AADpn/4FaWxsRF9fH5xOp7Fm+vTpyM7OhtvtBgC43W7MmjXLCA8AFBQUwO/3o6WlZcjnCQQC8Pv9YRsRxbZhxycUCmHDhg147LHHMHPmTACAx+OBxWJBWlpa2Fq73Q6Px2Os+Wx4Bo8PHhtKeXk5bDabsWVlZQ13bCIaI4YdH5fLhQsXLqCysnIk5xlSaWkpfD6fsbW3t4/6cxLR6IofzoPWrl2LmpoanDhxApMnTzb2OxwOBINBdHZ2hr378Xq9cDgcxprTp0+Hfb7Bu2GDaz7ParXCarUOZ1QiGqMieucjIli7di2qq6tx7Ngx5OTkhB3Py8tDQkIC6urqjH2tra1oa2uDrusAAF3X0dzcjI6ODmNNbW0tNE1Dbm7u/ZwLEcWQiN75uFwuHDx4EG+88QZSU1ONazQ2mw1JSUmw2WxYtWoVSkpKkJ6eDk3TsG7dOui6jgULFgAAFi1ahNzcXKxYsQI7duyAx+PBli1b4HK5+O6G6GskovhUVFQAAL7zne+E7d+3bx9+9rOfAQB27twJs9mMoqIiBAIBFBQUYM+ePcbauLg41NTUYM2aNdB1HePGjUNxcTG2bdt2f2dCRDElovjcy7cEJSYmYvfu3di9e/cXrnnwwQdx5MiRSJ6aiL5i+LNdRKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESjA+RKQE40NESgzrl4kRjQQRQWcwiP/q6oLNYsG/pabCbDKpHouihPEhJUQEbT092Hr+PFp9PoyLj8f/e+ghPJOTgzgG6GuBX3aREgLgP5qbcbGzEwMi8Pf14Y+XLuHCxx+rHo2ihPGhqIlLSkLSlCnGx/6+vrDjwVAIgYGBKE9FqjA+FDVxyclImT4dAGAC8F2HA/Gf+RLrIU3Dgykpn6y1WmEy86/nVxmv+VBUTfjud/FRbS0GurtRPHUqUhMS8P+vX8cDSUl49qGHkJGYCAAY//jjiOe/RvuVxvhQVCVmZ+OBZctw9cABxAeD+L9TpuDHU6Zg8P2PyWRCSm4u7EuW8J3PVxzjQ1FlMpsxafFiAID3P/8TfR9/DNOnvxvclJAA7ZFHMHnVKv4b7V8DjA9FnTkhARmFhbA9+ij8588j4PHAnJSElBkzkDJjBuKSklSPSFHA+JASJrMZid/4BhK/8Q3Vo5Ai/KKaiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhIiYjiU1FRgdmzZ0PTNGiaBl3XcfToUeN4b28vXC4XJkyYgJSUFBQVFcHr9YZ9jra2NhQWFiI5ORkZGRnYtGkT+vv7R+ZsiChmRBSfyZMnY/v27WhsbMTZs2fxve99D08//TRaWloAABs3bsThw4dRVVWF+vp6XLt2DUuXLjUePzAwgMLCQgSDQZw8eRIHDhzA/v37UVZWNrJnRURjn9yn8ePHyyuvvCKdnZ2SkJAgVVVVxrFLly4JAHG73SIicuTIETGbzeLxeIw1FRUVommaBAKBL3yO3t5e8fl8xtbe3i4AxOfz3e/4RDSCfD7fPb82h33NZ2BgAJWVlejp6YGu62hsbERfXx+cTqexZvr06cjOzobb7QYAuN1uzJo1C3a73VhTUFAAv99vvHsaSnl5OWw2m7FlZWUNd2wiGiMijk9zczNSUlJgtVqxevVqVFdXIzc3Fx6PBxaLBWlpaWHr7XY7PB4PAMDj8YSFZ/D44LEvUlpaCp/PZ2zt7e2Rjk1EY0zEv8P54YcfRlNTE3w+H15//XUUFxejvr5+NGYzWK1WWK3WUX0OIoquiONjsVgwdepUAEBeXh7OnDmDl156Cc888wyCwSA6OzvD3v14vV44HA4AgMPhwOnTp8M+3+DdsME1RPT1cN/f5xMKhRAIBJCXl4eEhATU1dUZx1pbW9HW1gZd1wEAuq6jubkZHR0dxpra2lpomobc3Nz7HYWIYkhE73xKS0vx5JNPIjs7G11dXTh48CDeeecdvP3227DZbFi1ahVKSkqQnp4OTdOwbt066LqOBQsWAAAWLVqE3NxcrFixAjt27IDH48GWLVvgcrn4ZRXR10xE8eno6MBPf/pTXL9+HTabDbNnz8bbb7+NJ554AgCwc+dOmM1mFBUVIRAIoKCgAHv27DEeHxcXh5qaGqxZswa6rmPcuHEoLi7Gtm3bRvasiGjMM4l8+m/VxhC/3w+bzQafzwdN01SPQ0SfiuS1yZ/tIiIlGB8iUoLxISIlGB8iUoLxISIlGB8iUoLxISIlGB8iUoLxISIlGB8iUoLxISIlGB8iUoLxISIlGB8iUoLxISIlGB8iUoLxISIlGB8iUoLxISIlGB8iUoLxISIlGB8iUoLxISIlGB8iUoLxISIlGB8iUoLxISIlGB8iUoLxISIlGB8iUoLxISIlGB8iUoLxISIlGB8iUoLxISIl7is+27dvh8lkwoYNG4x9vb29cLlcmDBhAlJSUlBUVASv1xv2uLa2NhQWFiI5ORkZGRnYtGkT+vv772cUIooxw47PmTNn8PLLL2P27Nlh+zdu3IjDhw+jqqoK9fX1uHbtGpYuXWocHxgYQGFhIYLBIE6ePIkDBw5g//79KCsrG/5ZEFHskWHo6uqSadOmSW1trTz++OOyfv16ERHp7OyUhIQEqaqqMtZeunRJAIjb7RYRkSNHjojZbBaPx2OsqaioEE3TJBAIDPl8vb294vP5jK29vV0AiM/nG874RDRKfD7fPb82h/XOx+VyobCwEE6nM2x/Y2Mj+vr6wvZPnz4d2dnZcLvdAAC3241Zs2bBbrcbawoKCuD3+9HS0jLk85WXl8NmsxlbVlbWcMYmojEk4vhUVlbi3LlzKC8vv+OYx+OBxWJBWlpa2H673Q6Px2Os+Wx4Bo8PHhtKaWkpfD6fsbW3t0c6NhGNMfGRLG5vb8f69etRW1uLxMTE0ZrpDlarFVarNWrPR0SjL6J3Po2Njejo6MCjjz6K+Ph4xMfHo76+Hrt27UJ8fDzsdjuCwSA6OzvDHuf1euFwOAAADofjjrtfgx8PriGir76I4rNw4UI0NzejqanJ2ObOnYvly5cb/52QkIC6ujrjMa2trWhra4Ou6wAAXdfR3NyMjo4OY01tbS00TUNubu4InRYRjXURfdmVmpqKmTNnhu0bN24cJkyYYOxftWoVSkpKkJ6eDk3TsG7dOui6jgULFgAAFi1ahNzcXKxYsQI7duyAx+PBli1b4HK5+KUV0ddIRPG5Fzt37oTZbEZRURECgQAKCgqwZ88e43hcXBxqamqwZs0a6LqOcePGobi4GNu2bRvpUYhoDDOJiKgeIlJ+vx82mw0+nw+apqkeh4g+Fclrkz/bRURKMD5EpATjQ0RKMD5EpATjQ0RKMD5EpATjQ0RKMD5EpATjQ0RKMD5EpATjQ0RKMD5EpATjQ0RKMD5EpATjQ0RKMD5EpATjQ0RKMD5EpATjQ0RKMD5EpATjQ0RKMD5EpATjQ0RKMD5EpATjQ0RKMD5EpATjQ0RKMD5EpATjQ0RKMD5EpATjQ0RKMD5EpATjQ0RKMD5EpATjQ0RKMD5EpATjQ0RKxKseYDhEBADg9/sVT0JEnzX4mhx8jd5NTMbn5s2bAICsrCzFkxDRULq6umCz2e66Jibjk56eDgBoa2v70hMca/x+P7KystDe3g5N01SPc884d3TF6twigq6uLmRmZn7p2piMj9n8yaUqm80WU/9jPkvTtJicnXNHVyzOfa9vCHjBmYiUYHyISImYjI/VasULL7wAq9WqepSIxersnDu6YnXuSJjkXu6JERGNsJh850NEsY/xISIlGB8iUoLxISIlGB8iUiIm47N7925MmTIFiYmJyM/Px+nTp5XOc+LECTz11FPIzMyEyWTCoUOHwo6LCMrKyvDAAw8gKSkJTqcTH3zwQdiaW7duYfny5dA0DWlpaVi1ahW6u7tHde7y8nLMmzcPqampyMjIwJIlS9Da2hq2pre3Fy6XCxMmTEBKSgqKiorg9XrD1rS1taGwsBDJycnIyMjApk2b0N/fP2pzV1RUYPbs2cZ3/+q6jqNHj47pmYeyfft2mEwmbNiwIeZmHxESYyorK8Viscif/vQnaWlpkWeffVbS0tLE6/Uqm+nIkSPy61//Wv785z8LAKmurg47vn37drHZbHLo0CH529/+Jj/84Q8lJydHbt++baxZvHixzJkzR06dOiXvvvuuTJ06VZYtWzaqcxcUFMi+ffvkwoUL0tTUJN///vclOztburu7jTWrV6+WrKwsqaurk7Nnz8qCBQvkW9/6lnG8v79fZs6cKU6nU86fPy9HjhyRiRMnSmlp6ajN/Ze//EXefPNN+fvf/y6tra3yq1/9ShISEuTChQtjdubPO336tEyZMkVmz54t69evN/bHwuwjJebiM3/+fHG5XMbHAwMDkpmZKeXl5Qqn+l+fj08oFBKHwyEvvviisa+zs1OsVqu8+uqrIiJy8eJFASBnzpwx1hw9elRMJpNcvXo1arN3dHQIAKmvrzfmTEhIkKqqKmPNpUuXBIC43W4R+SS8ZrNZPB6PsaaiokI0TZNAIBC12cePHy+vvPJKTMzc1dUl06ZNk9raWnn88ceN+MTC7CMppr7sCgaDaGxshNPpNPaZzWY4nU643W6Fk32xK1euwOPxhM1ss9mQn59vzOx2u5GWloa5c+caa5xOJ8xmMxoaGqI2q8/nA/C/vzWgsbERfX19YbNPnz4d2dnZYbPPmjULdrvdWFNQUAC/34+WlpZRn3lgYACVlZXo6emBrusxMbPL5UJhYWHYjEBs/HmPpJj6qfaPPvoIAwMDYX/wAGC323H58mVFU92dx+MBgCFnHjzm8XiQkZERdjw+Ph7p6enGmtEWCoWwYcMGPPbYY5g5c6Yxl8ViQVpa2l1nH+rcBo+NlubmZui6jt7eXqSkpKC6uhq5ubloamoaszMDQGVlJc6dO4czZ87ccWws/3mPhpiKD40el8uFCxcu4L333lM9yj15+OGH0dTUBJ/Ph9dffx3FxcWor69XPdZdtbe3Y/369aitrUViYqLqcZSLqS+7Jk6ciLi4uDuu/nu9XjgcDkVT3d3gXHeb2eFwoKOjI+x4f38/bt26FZXzWrt2LWpqanD8+HFMnjzZ2O9wOBAMBtHZ2XnX2Yc6t8Fjo8VisWDq1KnIy8tDeXk55syZg5deemlMz9zY2IiOjg48+uijiI+PR3x8POrr67Fr1y7Ex8fDbreP2dlHQ0zFx2KxIC8vD3V1dca+UCiEuro66LqucLIvlpOTA4fDETaz3+9HQ0ODMbOu6+js7ERjY6Ox5tixYwiFQsjPzx+12UQEa9euRXV1NY4dO4acnJyw43l5eUhISAibvbW1FW1tbWGzNzc3h8WztrYWmqYhNzd31Gb/vFAohEAgMKZnXrhwIZqbm9HU1GRsc+fOxfLly43/HquzjwrVV7wjVVlZKVarVfbv3y8XL16U5557TtLS0sKu/kdbV1eXnD9/Xs6fPy8A5He/+52cP39e/vnPf4rIJ7fa09LS5I033pD3339fnn766SFvtT/yyCPS0NAg7733nkybNm3Ub7WvWbNGbDabvPPOO3L9+nVj+9e//mWsWb16tWRnZ8uxY8fk7Nmzouu66LpuHB+89bto0SJpamqSt956SyZNmjSqt343b94s9fX1cuXKFXn//fdl8+bNYjKZ5K9//euYnfmLfPZuV6zNfr9iLj4iIn/4wx8kOztbLBaLzJ8/X06dOqV0nuPHjwuAO7bi4mIR+eR2+9atW8Vut4vVapWFCxdKa2tr2Oe4efOmLFu2TFJSUkTTNFm5cqV0dXWN6txDzQxA9u3bZ6y5ffu2PP/88zJ+/HhJTk6WH/3oR3L9+vWwz/OPf/xDnnzySUlKSpKJEyfKL37xC+nr6xu1uX/+85/Lgw8+KBaLRSZNmiQLFy40wjNWZ/4in49PLM1+v/j7fIhIiZi65kNEXx2MDxEpwfgQkRKMDxEpwfgQkRKMDxEpwfgQkRKMDxEpwfgQkRKMDxEpwfgQkRL/A9wZem2EbyznAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 300x300 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import gym\n",
    "\n",
    "\n",
    "#定义环境\n",
    "class MyWrapper(gym.Wrapper):\n",
    "\n",
    "    def __init__(self):\n",
    "        env = gym.make('Pendulum-v1', render_mode='rgb_array')\n",
    "        super().__init__(env)\n",
    "        self.env = env\n",
    "        self.step_n = 0\n",
    "\n",
    "    def reset(self):\n",
    "        state, _ = self.env.reset()\n",
    "        self.step_n = 0\n",
    "        return state\n",
    "\n",
    "    def step(self, action):\n",
    "        state, reward, terminated, truncated, info = self.env.step(\n",
    "            [action * 2])\n",
    "        over = terminated or truncated\n",
    "\n",
    "        #偏移reward,便于训练\n",
    "        reward = (reward + 8) / 8\n",
    "\n",
    "        #限制最大步数\n",
    "        self.step_n += 1\n",
    "        if self.step_n >= 200:\n",
    "            over = True\n",
    "\n",
    "        return state, reward, over\n",
    "\n",
    "    #打印游戏图像\n",
    "    def show(self):\n",
    "        from matplotlib import pyplot as plt\n",
    "        plt.figure(figsize=(3, 3))\n",
    "        plt.imshow(self.env.render())\n",
    "        plt.show()\n",
    "\n",
    "\n",
    "env = MyWrapper()\n",
    "\n",
    "env.reset()\n",
    "\n",
    "env.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.0211],\n",
       "        [-0.0411]], grad_fn=<TanhBackward0>)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "\n",
    "class Model(torch.nn.Module):\n",
    "\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        self.s = torch.nn.Sequential(\n",
    "            torch.nn.Linear(3, 64),\n",
    "            torch.nn.ReLU(),\n",
    "            torch.nn.Linear(64, 64),\n",
    "            torch.nn.ReLU(),\n",
    "            torch.nn.Linear(64, 1),\n",
    "            torch.nn.Tanh(),\n",
    "        )\n",
    "\n",
    "    def forward(self, state):\n",
    "        return self.s(state)\n",
    "\n",
    "\n",
    "model_action = Model()\n",
    "model_action_delay = Model()\n",
    "model_action_delay.load_state_dict(model_action.state_dict())\n",
    "\n",
    "model_action(torch.randn(2, 3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.1421],\n",
       "        [-0.0835]], grad_fn=<AddmmBackward0>)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_value = torch.nn.Sequential(\n",
    "    torch.nn.Linear(4, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 1),\n",
    ")\n",
    "\n",
    "model_value_delay = torch.nn.Sequential(\n",
    "    torch.nn.Linear(4, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 1),\n",
    ")\n",
    "\n",
    "model_value_delay.load_state_dict(model_value.state_dict())\n",
    "\n",
    "model_value(torch.randn(2, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\appDir\\python3.10\\lib\\site-packages\\gym\\utils\\passive_env_checker.py:233: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`.  (Deprecated NumPy 1.24)\n",
      "  if not isinstance(terminated, (bool, np.bool8)):\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "24.51980899570239"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from IPython import display\n",
    "import random\n",
    "\n",
    "\n",
    "#玩一局游戏并记录数据\n",
    "def play(show=False):\n",
    "    data = []\n",
    "    reward_sum = 0\n",
    "\n",
    "    state = env.reset()\n",
    "    over = False\n",
    "    while not over:\n",
    "        action = model_action(torch.FloatTensor(state).reshape(1, 3)).item()\n",
    "\n",
    "        #给动作添加噪声,增加探索\n",
    "        action += random.normalvariate(mu=0, sigma=0.2)\n",
    "\n",
    "        next_state, reward, over = env.step(action)\n",
    "\n",
    "        data.append((state, action, reward, next_state, over))\n",
    "        reward_sum += reward\n",
    "\n",
    "        state = next_state\n",
    "\n",
    "        if show:\n",
    "            display.clear_output(wait=True)\n",
    "            env.show()\n",
    "\n",
    "    return data, reward_sum\n",
    "\n",
    "\n",
    "play()[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_6560\\1738991660.py:27: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at ..\\torch\\csrc\\utils\\tensor_new.cpp:248.)\n",
      "  state = torch.FloatTensor([i[0] for i in data]).reshape(-1, 3)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(torch.Size([64, 3]),\n",
       " 200,\n",
       " (array([0.80205846, 0.5972456 , 0.04965575], dtype=float32),\n",
       "  0.2752124316053419,\n",
       "  0.9487213086994294,\n",
       "  array([0.78439873, 0.6202569 , 0.58015364], dtype=float32),\n",
       "  False))"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#数据池\n",
    "class Pool:\n",
    "\n",
    "    def __init__(self):\n",
    "        self.pool = []\n",
    "\n",
    "    def __len__(self):\n",
    "        return len(self.pool)\n",
    "\n",
    "    def __getitem__(self, i):\n",
    "        return self.pool[i]\n",
    "\n",
    "    #更新动作池\n",
    "    def update(self):\n",
    "        #每次更新不少于N条新数据\n",
    "        old_len = len(self.pool)\n",
    "        while len(pool) - old_len < 200:\n",
    "            self.pool.extend(play()[0])\n",
    "\n",
    "        #只保留最新的N条数据\n",
    "        self.pool = self.pool[-2_0000:]\n",
    "\n",
    "    #获取一批数据样本\n",
    "    def sample(self):\n",
    "        data = random.sample(self.pool, 64)\n",
    "\n",
    "        state = torch.FloatTensor([i[0] for i in data]).reshape(-1, 3)\n",
    "        action = torch.FloatTensor([i[1] for i in data]).reshape(-1, 1)\n",
    "        reward = torch.FloatTensor([i[2] for i in data]).reshape(-1, 1)\n",
    "        next_state = torch.FloatTensor([i[3] for i in data]).reshape(-1, 3)\n",
    "        over = torch.LongTensor([i[4] for i in data]).reshape(-1, 1)\n",
    "\n",
    "        return state, action, reward, next_state, over\n",
    "\n",
    "\n",
    "pool = Pool()\n",
    "pool.update()\n",
    "state, action, reward, next_state, over = pool.sample()\n",
    "\n",
    "next_state.shape, len(pool), pool[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer_action = torch.optim.Adam(model_action.parameters(), lr=5e-4)\n",
    "optimizer_value = torch.optim.Adam(model_value.parameters(), lr=5e-3)\n",
    "\n",
    "\n",
    "def soft_update(_from, _to):\n",
    "    for _from, _to in zip(_from.parameters(), _to.parameters()):\n",
    "        value = _to.data * 0.7 + _from.data * 0.3\n",
    "        _to.data.copy_(value)\n",
    "\n",
    "\n",
    "def requires_grad(model, value):\n",
    "    for param in model.parameters():\n",
    "        param.requires_grad_(value)\n",
    "\n",
    "\n",
    "requires_grad(model_action_delay, False)\n",
    "requires_grad(model_value_delay, False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.4627004563808441"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def train_action(state):\n",
    "    requires_grad(model_action, True)\n",
    "    requires_grad(model_value, False)\n",
    "\n",
    "    #首先把动作计算出来\n",
    "    action = model_action(state)\n",
    "\n",
    "    #使用value网络评估动作的价值,价值是越高越好\n",
    "    input = torch.cat([state, action], dim=1)\n",
    "    loss = -model_value(input).mean()\n",
    "\n",
    "    loss.backward()\n",
    "    optimizer_action.step()\n",
    "    optimizer_action.zero_grad()\n",
    "\n",
    "    return loss.item()\n",
    "\n",
    "\n",
    "train_action(state)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.35553503036499023"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def train_value(state, action, reward, next_state, over):\n",
    "    requires_grad(model_action, False)\n",
    "    requires_grad(model_value, True)\n",
    "\n",
    "    #计算value\n",
    "    input = torch.cat([state, action], dim=1)\n",
    "    value = model_value(input)\n",
    "\n",
    "    #计算target\n",
    "    with torch.no_grad():\n",
    "        next_action = model_action_delay(next_state)\n",
    "        input = torch.cat([next_state, next_action], dim=1)\n",
    "        target = model_value_delay(input)\n",
    "    target = target * 0.99 * (1 - over) + reward\n",
    "\n",
    "    #计算td loss,更新参数\n",
    "    loss = torch.nn.functional.mse_loss(value, target)\n",
    "\n",
    "    loss.backward()\n",
    "    optimizer_value.step()\n",
    "    optimizer_value.zero_grad()\n",
    "\n",
    "    return loss.item()\n",
    "\n",
    "\n",
    "train_value(state, action, reward, next_state, over)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 400 20.26322107432315\n"
     ]
    }
   ],
   "source": [
    "#训练\n",
    "def train():\n",
    "    model_action.train()\n",
    "    model_value.train()\n",
    "\n",
    "    #共更新N轮数据\n",
    "    for epoch in range(10):\n",
    "        pool.update()\n",
    "\n",
    "        #每次更新数据后,训练N次\n",
    "        for i in range(20):\n",
    "\n",
    "            #采样N条数据\n",
    "            state, action, reward, next_state, over = pool.sample()\n",
    "\n",
    "            #训练模型\n",
    "            train_action(state)\n",
    "            train_value(state, action, reward, next_state, over)\n",
    "\n",
    "        soft_update(model_action, model_action_delay)\n",
    "        soft_update(model_value, model_value_delay)\n",
    "\n",
    "        if epoch % 20 == 0:\n",
    "            test_result = sum([play()[-1] for _ in range(20)]) / 20\n",
    "            print(epoch, len(pool), test_result)\n",
    "\n",
    "\n",
    "train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAR8AAAEXCAYAAACUBEAgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAjC0lEQVR4nO3dfVSUZf4/8PcMzAyPMwjIjBiIm5qR4iYWTu3324Os5FKbRd+THlc95tpPw45mx1a2srN7vi1ubrU9Wu2erM4eZY+esDQfllAx10kUZUNS0jIhYUAhZgBlgJnP7w9lvk0+zSBwMfZ+nXOfE/d13TOfEebdfV/3fV+3RkQERET9TKu6ACL6aWL4EJESDB8iUoLhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREsrC54033kBKSgrCwsKQkZGB0tJSVaUQkQJKwuef//wnlixZgueeew4HDhzAuHHjkJWVhYaGBhXlEJECGhU3lmZkZOCWW27B66+/DgDweDxISkrC448/jmXLll1xe4/Hg9raWkRHR0Oj0fR1uUTkJxFBS0sLEhMTodVeft8mtJ9q8uro6EBZWRny8vK867RaLTIzM2Gz2S66jcvlgsvl8v588uRJpKam9nmtRNQzNTU1uO666y7bp9/D5/Tp03C73TCbzT7rzWYzjhw5ctFt8vPz8Yc//OGC9TU1NTAajX1SJxEFzul0IikpCdHR0Vfs2+/h0xN5eXlYsmSJ9+fuD2g0Ghk+RAOQP8Mh/R4+8fHxCAkJQX19vc/6+vp6WCyWi25jMBhgMBj6ozwi6if9frZLr9cjPT0dxcXF3nUejwfFxcWwWq39XQ4RKaLksGvJkiWYPXs2JkyYgFtvvRV//etf0dbWhjlz5qgoh4gUUBI+Dz/8ME6dOoXly5fDbrfj5z//ObZu3XrBIDQRXbuUXOdztZxOJ0wmExwOBweciQaQQL6bvLeLiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIlGD5EpATDh4iUYPgQkRIMHyJSguFDREowfIhICYYPESnB8CEiJRg+RKQEw4eIlGD4EJESDB8iUoLhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIlGD5EpATDh4iUYPgQkRIMHyJSIuDw2bVrF+677z4kJiZCo9Fgw4YNPu0iguXLl2PIkCEIDw9HZmYmjh496tOnqakJM2bMgNFoRExMDObOnYvW1tar+iBEFFwCDp+2tjaMGzcOb7zxxkXbX3jhBbz66qt46623sHfvXkRGRiIrKwvt7e3ePjNmzEBlZSWKioqwadMm7Nq1C48++mjPPwURBR+5CgCksLDQ+7PH4xGLxSIrV670rmtubhaDwSBr164VEZEvv/xSAMi+ffu8fbZs2SIajUZOnjzp1/s6HA4BIA6H42rKJ6JeFsh3s1fHfI4fPw673Y7MzEzvOpPJhIyMDNhsNgCAzWZDTEwMJkyY4O2TmZkJrVaLvXv3XvR1XS4XnE6nz0JEwa1Xw8dutwMAzGazz3qz2exts9vtSEhI8GkPDQ1FbGyst8+P5efnw2QyeZekpKTeLJuIFAiKs115eXlwOBzepaamRnVJRHSVejV8LBYLAKC+vt5nfX19vbfNYrGgoaHBp72rqwtNTU3ePj9mMBhgNBp9FiIKbr0aPsOHD4fFYkFxcbF3ndPpxN69e2G1WgEAVqsVzc3NKCsr8/bZvn07PB4PMjIyerMcIhrAQgPdoLW1FceOHfP+fPz4cZSXlyM2NhbJyclYvHgx/vd//xcjR47E8OHD8eyzzyIxMRFTp04FANx444245557MG/ePLz11lvo7OzEwoULMW3aNCQmJvbaByOiAS7QU2k7duwQABcss2fPFpFzp9ufffZZMZvNYjAYZNKkSVJVVeXzGo2NjTJ9+nSJiooSo9Eoc+bMkZaWFr9r4Kl2ooEpkO+mRkREYfb1iNPphMlkgsPh4PgP0QASyHczKM52EdG1h+FDREowfIhICYYPESnB8CEiJRg+RKQEw4eIlGD4EJESDB8iUoLhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIlGD5EpATDh4iUYPgQkRIMHyJSguFDREowfIhICYYPESnB8CEiJRg+RKQEw4eIlGD4EJESDB8iUoLhQ0RKMHyISImAwic/Px+33HILoqOjkZCQgKlTp6KqqsqnT3t7O3JzcxEXF4eoqCjk5OSgvr7ep091dTWys7MRERGBhIQELF26FF1dXVf/aYgoaAQUPiUlJcjNzcXnn3+OoqIidHZ2YvLkyWhra/P2eeKJJ7Bx40asW7cOJSUlqK2txYMPPuhtd7vdyM7ORkdHB/bs2YP3338f7733HpYvX957n4qIBj65Cg0NDQJASkpKRESkublZdDqdrFu3ztvn8OHDAkBsNpuIiGzevFm0Wq3Y7XZvn1WrVonRaBSXy+XX+zocDgEgDofjasonol4WyHfzqsZ8HA4HACA2NhYAUFZWhs7OTmRmZnr7jB49GsnJybDZbAAAm82GsWPHwmw2e/tkZWXB6XSisrLyou/jcrngdDp9FiIKbj0OH4/Hg8WLF+P222/HmDFjAAB2ux16vR4xMTE+fc1mM+x2u7fPD4Onu7277WLy8/NhMpm8S1JSUk/LJoVE5JIL/fSE9nTD3NxcHDp0CLt37+7Nei4qLy8PS5Ys8f7sdDoZQEGiO1jOnDmDo0ePorKyEocOHcKJEyfQ2NiI9PR0PPfcczAYDIorpf7Wo/BZuHAhNm3ahF27duG6667zrrdYLOjo6EBzc7PP3k99fT0sFou3T2lpqc/rdZ8N6+7zYwaDgX+cQUZE4Ha78fXXX+Ojjz7Cxx9/jOPHjwMAkpKSMGTIEAwZMgSJiYnQannFx09RQOEjInj88cdRWFiInTt3Yvjw4T7t6enp0Ol0KC4uRk5ODgCgqqoK1dXVsFqtAACr1Yrnn38eDQ0NSEhIAAAUFRXBaDQiNTW1Nz4TKeZ2u/HVV1/h7bffxkcffQSDwYDMzEwsXboUaWlpGDRoEMLDw6HT6aDRaKDRaFSXTCoEMpK9YMECMZlMsnPnTqmrq/MuZ86c8faZP3++JCcny/bt22X//v1itVrFarV627u6umTMmDEyefJkKS8vl61bt8rgwYMlLy/P7zp4tmtg8ng84nQ65fXXX5eRI0dKWlqavPbaa1JTUyOdnZ3i8XhUl0h9LJDvZkDhA+Ciy+rVq719zp49K4899pgMGjRIIiIi5IEHHpC6ujqf1/n2229lypQpEh4eLvHx8fLkk09KZ2en33UwfAYej8cj1dXV8pvf/EYSExNl2bJl8t1334nb7VZdGvWjQL6bGpHgO9XgdDphMpngcDhgNBpVl/OTJyI4evQo5s+fj/r6evz5z39GVlYWQkNDeUj1ExPId7PHZ7uIgHPB88033+CRRx6Bx+PB2rVrMXbsWIYOXRHDh3pMRFBfX4/c3Fx0dnbigw8+wKhRoxg85BeGD/VYe3s7nn/+eRw/fhxr1qxh8FBAGD7UIyKCjRs34sMPP8RLL72Em2++mcFDAeHVXdQjdrsdK1euxH333YepU6fyQkEKGP9iKGDdA8sOhwNLliyBXq9XXRIFIYYPBUREcPr0aXzwwQeYNm0arr/+eh5uUY8wfChgn376KZqbmzF9+nSEhISoLoeCFMOHAuJ2u1FYWAir1Yqf/exnqsvxIZyeI6gwfCggtbW1KC8vxz333HPVYz3i8aCrpQUdjY3obG6Gx+W6qvCoq6u7YL5wGrh4qp38JiI4fPgwzpw5g4yMjB6P9YgIOpuacGrLFjhKS9Fx+jS0BgMiRoxAwq9+hei0NGgCPJzzeDwoKCiA0WjE3LlzOQ4VBBg+FJAvvvgCQ4cOvWA2Sn+JCFy1tfj2lVfQVlUFnN/Tcbe2wtHYiNaKCiTOmoXBWVkBBVBrayvWr1+PuLg4zJw5k/M/BQEedlFAjhw5gpSUFERFRfVoe3dbG6rffhttR45APB5873Jh/+nTOOp0wiMC95kzOPnBB3AcOOD3IZiIYP/+/aisrMT+/fvx9ddfc+wnCDB8KCAnT55EYmIiQkMD32mWri7UrV2LlvJyiAiq29qwqLQUuZ9/jv+3Zw8Kjh+HWwSeM2dQX1gIT3u7X6/r8XhQWFiI1tZWnD59Gtu3bw+4Nup/DB8KiMvlQmxsbI/GVNrtdjTu3Ang3ERQf66owJfNzXCLwNnZidcPH8ah778HAJz55hu4z5694muKCOx2O7Zt2wbgXBB9/PHHcLlcAddH/YvhQwHryVkuEUFTSQncLS3edc7OTp8+HR4PXG53wK+9fft2tLW1wWKxIC4uDkeOHEFVVRUPvQY4DjhTwHqyV+Fua4PjBw8O0AC4y2LBMacTXedDYpTRiGHnx5JCDAZo/LhfrKurC1VVVXjnnXfw9ttvY+jQoRg9ejQOHDiAtLS0gOuk/sPwoYAYDAY0NTVBRPw+9BIROA8cQHtNjXedRqPB7BEjEK3T4dO6OgwJD8e8UaOQEBYGADCOH49QP2ap7H5+XEREBF588UUMGTIEjz32GK/3CQIMHwrI0KFDUVtbi66uLuh0Or+2ka4unNqyBdLV5bM+VKvF/6Sk4KGUFHTHmEajAUJCEGO1+rXno9frERcXB6fTiYaGBlgsFoSGhmLo0KG81meA45gPBWT06NE4ceIEWltb/eovImivqcHZEycu2q7RaKA9//ic7rCIHDkS0eefgnsl3dvZ7Xa0trYiJSWFj+MJEgwfCkhaWhpOnjwZ0GHN6U8/hdvPsAIAU3o6QiIi/O4vIqisrIRWq8WoUaP83o7UYviQ3zQaDVJTUxEZGYnS0lK/zia5W1rgLCvz+z10cXGIveOOgOryeDzYvXs3RowY4X0QJQ18DB8KyJAhQzB+/Hhs3boVHR0dl+0rImguLYXLbvf79Qfddhv0Ad664XA4sGPHDkyaNAlh5wesaeBj+FBAQkJC8MADD8Bms+Gbb765bF9xu9FYXOy9f+tKNDodYs4/VttfIoLPPvsMjY2NmDJlCqdzDSL8TVHA7r77bsTExGDNmjVwX+aiwLPffIMzx475/bpRo0cj8oYbAhosbm9vx7vvvovbbruN4z1BhuFDAdFoNIiLi8OsWbNQUFCAY8eOXXTsR0Rwats2eAK4IDH2rrugCeCeMRFBcXExSktL8dvf/pZ3sgcZhg8FTKvVYtq0aYiJicHLL7980bGfzsZGtFRU+P2aerMZxgAfv3Pq1CmsWLECd999N26//XaeXg8yDB/qEYvFgqeeegobN25EYWEhPB6Pt01E8L3Nho4ATscP+sUvoBs0yO/+HR0deOWVV1BbW4unnnqKA81BiOFDPaLRaHDvvfciJycHy5cvR1lZmffwy93WhsZPP/V7oFkbHo6YW27x64pm4Nw80uvXr8fq1auRl5fHZ8MHKYYP9VhYWBiefvppXH/99ViwYAEOHz4MEcHZ48fR/t13fr9O9NixiBgxwq++Ho8HxcXFePrppzFt2jTMnDmTZ7iCFH9r1GMajQYJCQl47bXXYDAYMGfOHJSXleHUtm2QH02XcUlaLUwTJkBzhfvERARutxubN2/GY489hjvvvBPLly/nIHMQY/jQVdFoNLj++uvx7rvvIiIiAtMffhgbP/wQXR6PX1dAGyyWczeRXuawSURw5swZrFq1CgsWLMCdd96Jv/zlLzCZTDzcCmIMH7pqGo0Go0aNwgfvv4+bR43C8wcOYNWRI2hob4fncgGk0SD2zjsvOXWGiKCrqwsVFRWYN28enn/+ecydOxcvvvhij2dTpIGDU2pQr9BoNEgcPBhLkpMxbNQorP3mG+xpaMDUYcNwp8WCOIMBIT+621xrMGDQRfZ6RAQdHR04duwY/vGPf6CgoAAJCQl45513kJWVxWfDXyMYPtQrRAStlZUIaWxEzrBhuCU+Hh9++y3WfP01/nn8OG6Nj0fG4MEYaTTCpNcjLCQE8ePHw5CYCI/Hg46ODrS1taGhoQH79u3D1q1bsXv3bhiNRixYsACzZs2C2Wzm3s41hOFDvcPjOTfQ3NEBrUaDYZGRWJSaipyUFJTY7dhlt2Pn+RtMzWFhiA8Lg8XlQuTRo3C5XGhqakJtbS1qa2sRHh6Om266CcuXL8eUKVMwZMgQztFzDQpozGfVqlVIS0uD0WiE0WiE1WrFli1bvO3t7e3Izc1FXFwcoqKikJOTc8G8L9XV1cjOzkZERAQSEhKwdOlSdP1ohjsKPi67/dxDAM/TaDQI0WqRHBmJmddfj9cmTsRrGRlYnJqKjMGDYYyLQ4tOh5qaGjgcDpjNZuTk5OBvf/sbioqKsGHDBsydOxeJiYnQarUMnmtQQHs+1113HVasWIGRI0dCRPD+++/j/vvvx8GDB3HTTTfhiSeewCeffIJ169bBZDJh4cKFePDBB/Hvf/8bwLmLw7Kzs2GxWLBnzx7U1dVh1qxZ0Ol0+NOf/tQnH5D6noigadcudDU3X9DWHRrhoaEYYTRihNEIaDRIeuQRJNx//yVfk2Fz7dPIVT5fJDY2FitXrsRDDz2EwYMHY82aNXjooYcAnHu65Y033gibzYaJEydiy5YtuPfee1FbW+t93O5bb72F3/3udzh16tQlBxJdLpfPExOcTieSkpLgcDhg9GOScepbHU1N+GrZMr/n7QmJjsYNf/oTwocN6+PKqL85nU6YTCa/vps9PtXudrtRUFCAtrY2WK1WlJWVobOzE5mZmd4+o0ePRnJyMmw2GwDAZrNh7NixPs/5zsrKgtPpRGVl5SXfKz8/HyaTybskJSX1tGzqA21HjgQ0YZhpwgSE8Xf4kxdw+FRUVCAqKgoGgwHz589HYWEhUlNTYbfbodfrERMT49PfbDbDfv4P0263+wRPd3t326Xk5eXB4XB4l5ofPIKF1PK4XDh9/mmhfgkJwaDbbgN4WPWTF/DZrhtuuAHl5eVwOBxYv349Zs+ejZKSkr6ozctgMPAy+gGq/bvvApo6IyIlBdHjxnFMhwIPH71ejxHnbwJMT0/Hvn378Morr+Dhhx9GR0cHmpubffZ+6uvrYbFYAJybhqH0B0+t7G7vbqPgIh4PmkpKLnge1yVptYjPyoKW/yMh9MLtFR6PBy6XC+np6dDpdCguLva2VVVVobq6Gtbz8/JarVZUVFSgoaHB26eoqAhGoxGpqalXWwr1s87mZjTt2uV3f53JBNOtt3KvhwAEuOeTl5eHKVOmIDk5GS0tLVizZg127tyJbdu2wWQyYe7cuViyZAliY2NhNBrx+OOPw2q1YuLEiQCAyZMnIzU1FTNnzsQLL7wAu92OZ555Brm5uTysCjIigpb//AedDoff25gmToTOZOrDqiiYBBQ+DQ0NmDVrFurq6mAymZCWloZt27bhl7/8JQDg5ZdfhlarRU5ODlwuF7KysvDmm296tw8JCcGmTZuwYMECWK1WREZGYvbs2fjjH//Yu5+K+pzH5cLpoiLgMhPI/5A2LAzxkyZBExLSx5VRsLjq63xUCORaAuobbUeP4qtnnoHn7Fm/+kelpmLEc88hJDy8jysjlfrlOh/66RK3G407dvgdPNBqETd5MrScZ5l+gOFDAes4dQrfn79lxh+hJhOMaWkcaCYfDB8KiIjAUVaGru+/93ub2P/6L+ji4vqwKgpGDB8KiMflQuOOHX7314SEIPa//5t7PXQBhg/5TUTQ+uWXOPP1135vEzVmDMJTUvquKApaDB/ynwi+37XL79Pr0GoxeMoUaDntKV0Ew4f81n7yJJr37fO7f9jQoYgeN64PK6JgxvAhv4gInOXlcLe0+L1NjNWKkIiIPqyKghnDh/zibmk59whkP4XGxCDurrs40EyXxPAhv7QdO4azJ0743T9y5EgYOFMBXQbDh67I09mJxuJiwOPxq79Gr0d8VhbAZ6jTZfCvg66ovaYGjv37/e6vj49H9JgxPOSiy2L40GWJCJpKSvy/j0ujQVxmJrS8gZSugOFDl+VuaUHzj2afvJyQ8HDE/uIX3OuhK2L40CWJCJpLS+Gqq/N7G+P48byPi/zC8KFLc7vhPHjQ/4Fmne7cHM06XR8XRtcChg9dkrjdPo9AvpLIG25A5KhRfVgRXUsYPtRrYjIyOGEY+Y3hQ5fl7+0RerMZg26/nQPN5DeGD12SvxcLag0GDP3NbzjQTAFh+NAlaTQaxN1112Ufb6wJDYX5gQcwiKfXKUABP7GUflpCIiKQNG8etBER+P6zz+BpbwdEAI0GoUYjEu67Dwn3389H4lDAGD50RbpBg5D86KOIz8xEa2UlulpaoI+LQ/S4cQgbOpTBQz3C8CG/aPV6RI0ejajRo1WXQtcIjvkQkRIMHyJSguFDREowfIhICYYPESnB8CEiJRg+RKQEw4eIlGD4EJESDB8iUoLhQ0RKXFX4rFixAhqNBosXL/aua29vR25uLuLi4hAVFYWcnBzU19f7bFddXY3s7GxEREQgISEBS5cuRVdX19WUQkRBpsfhs2/fPrz99ttIS0vzWf/EE09g48aNWLduHUpKSlBbW4sHH3zQ2+52u5GdnY2Ojg7s2bMH77//Pt577z0sX76855+CiIKP9EBLS4uMHDlSioqK5I477pBFixaJiEhzc7PodDpZt26dt+/hw4cFgNhsNhER2bx5s2i1WrHb7d4+q1atEqPRKC6X66Lv197eLg6Hw7vU1NQIAHE4HD0pn4j6iMPh8Pu72aM9n9zcXGRnZyMzM9NnfVlZGTo7O33Wjx49GsnJybDZbAAAm82GsWPHwmw2e/tkZWXB6XSisrLyou+Xn58Pk8nkXZKSknpSNhENIAGHT0FBAQ4cOID8/PwL2ux2O/R6PWJiYnzWm81m2O12b58fBk93e3fbxeTl5cHhcHiXmpqaQMsmogEmoMnEampqsGjRIhQVFSGsHx+RYjAYYDAY+u39iKjvBbTnU1ZWhoaGBowfPx6hoaEIDQ1FSUkJXn31VYSGhsJsNqOjowPNzc0+29XX18NisQAALBbLBWe/un/u7kNE176AwmfSpEmoqKhAeXm5d5kwYQJmzJjh/W+dTofi4mLvNlVVVaiurobVagUAWK1WVFRUoKGhwdunqKgIRqMRqampvfSxiGigC+iwKzo6GmPGjPFZFxkZibi4OO/6uXPnYsmSJYiNjYXRaMTjjz8Oq9WKiRMnAgAmT56M1NRUzJw5Ey+88ALsdjueeeYZ5Obm8tCK6Cek1yeQf/nll6HVapGTkwOXy4WsrCy8+eab3vaQkBBs2rQJCxYsgNVqRWRkJGbPno0//vGPvV0KEQ1gGhER1UUEyul0wmQyweFwwGg0qi6HiM4L5LvJe7uISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIlGD5EpATDh4iUYPgQkRIMHyJSguFDREowfIhICYYPESnB8CEiJRg+RKQEw4eIlGD4EJESDB8iUoLhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIlGD5EpATDh4iUCFVdQE+ICADA6XQqroSIfqj7O9n9Hb2coAyfxsZGAEBSUpLiSojoYlpaWmAymS7bJyjDJzY2FgBQXV19xQ840DidTiQlJaGmpgZGo1F1OX5j3f0rWOsWEbS0tCAxMfGKfYMyfLTac0NVJpMpqH4xP2Q0GoOydtbdv4Kxbn93CDjgTERKMHyISImgDB+DwYDnnnsOBoNBdSkBC9baWXf/Cta6A6ERf86JERH1sqDc8yGi4MfwISIlGD5EpATDh4iUYPgQkRJBGT5vvPEGUlJSEBYWhoyMDJSWliqtZ9euXbjvvvuQmJgIjUaDDRs2+LSLCJYvX44hQ4YgPDwcmZmZOHr0qE+fpqYmzJgxA0ajETExMZg7dy5aW1v7tO78/HzccsstiI6ORkJCAqZOnYqqqiqfPu3t7cjNzUVcXByioqKQk5OD+vp6nz7V1dXIzs5GREQEEhISsHTpUnR1dfVZ3atWrUJaWpr36l+r1YotW7YM6JovZsWKFdBoNFi8eHHQ1d4rJMgUFBSIXq+Xd999VyorK2XevHkSExMj9fX1ymravHmzPP300/Lhhx8KACksLPRpX7FihZhMJtmwYYP85z//kV//+tcyfPhwOXv2rLfPPffcI+PGjZPPP/9cPvvsMxkxYoRMnz69T+vOysqS1atXy6FDh6S8vFx+9atfSXJysrS2tnr7zJ8/X5KSkqS4uFj2798vEydOlNtuu83b3tXVJWPGjJHMzEw5ePCgbN68WeLj4yUvL6/P6v7444/lk08+ka+++kqqqqrk97//veh0Ojl06NCArfnHSktLJSUlRdLS0mTRokXe9cFQe28JuvC59dZbJTc31/uz2+2WxMREyc/PV1jV//lx+Hg8HrFYLLJy5UrvuubmZjEYDLJ27VoREfnyyy8FgOzbt8/bZ8uWLaLRaOTkyZP9VntDQ4MAkJKSEm+dOp1O1q1b5+1z+PBhASA2m01EzgWvVqsVu93u7bNq1SoxGo3icrn6rfZBgwbJ3//+96CouaWlRUaOHClFRUVyxx13eMMnGGrvTUF12NXR0YGysjJkZmZ612m1WmRmZsJmsyms7NKOHz8Ou93uU7PJZEJGRoa3ZpvNhpiYGEyYMMHbJzMzE1qtFnv37u23Wh0OB4D/mzWgrKwMnZ2dPrWPHj0aycnJPrWPHTsWZrPZ2ycrKwtOpxOVlZV9XrPb7UZBQQHa2tpgtVqDoubc3FxkZ2f71AgEx793bwqqu9pPnz4Nt9vt8w8PAGazGUeOHFFU1eXZ7XYAuGjN3W12ux0JCQk+7aGhoYiNjfX26WsejweLFy/G7bffjjFjxnjr0uv1iImJuWztF/ts3W19paKiAlarFe3t7YiKikJhYSFSU1NRXl4+YGsGgIKCAhw4cAD79u27oG0g/3v3haAKH+o7ubm5OHToEHbv3q26FL/ccMMNKC8vh8PhwPr16zF79myUlJSoLuuyampqsGjRIhQVFSEsLEx1OcoF1WFXfHw8QkJCLhj9r6+vh8ViUVTV5XXXdbmaLRYLGhoafNq7urrQ1NTUL59r4cKF2LRpE3bs2IHrrrvOu95isaCjowPNzc2Xrf1in627ra/o9XqMGDEC6enpyM/Px7hx4/DKK68M6JrLysrQ0NCA8ePHIzQ0FKGhoSgpKcGrr76K0NBQmM3mAVt7Xwiq8NHr9UhPT0dxcbF3ncfjQXFxMaxWq8LKLm348OGwWCw+NTudTuzdu9dbs9VqRXNzM8rKyrx9tm/fDo/Hg4yMjD6rTUSwcOFCFBYWYvv27Rg+fLhPe3p6OnQ6nU/tVVVVqK6u9qm9oqLCJzyLiopgNBqRmpraZ7X/mMfjgcvlGtA1T5o0CRUVFSgvL/cuEyZMwIwZM7z/PVBr7xOqR7wDVVBQIAaDQd577z358ssv5dFHH5WYmBif0f/+1tLSIgcPHpSDBw8KAHnppZfk4MGDcuLECRE5d6o9JiZGPvroI/niiy/k/vvvv+ip9ptvvln27t0ru3fvlpEjR/b5qfYFCxaIyWSSnTt3Sl1dnXc5c+aMt8/8+fMlOTlZtm/fLvv37xer1SpWq9Xb3n3qd/LkyVJeXi5bt26VwYMH9+mp32XLlklJSYkcP35cvvjiC1m2bJloNBr517/+NWBrvpQfnu0KttqvVtCFj4jIa6+9JsnJyaLX6+XWW2+Vzz//XGk9O3bsEAAXLLNnzxaRc6fbn332WTGbzWIwGGTSpElSVVXl8xqNjY0yffp0iYqKEqPRKHPmzJGWlpY+rftiNQOQ1atXe/ucPXtWHnvsMRk0aJBERETIAw88IHV1dT6v8+2338qUKVMkPDxc4uPj5cknn5TOzs4+q/uRRx6RYcOGiV6vl8GDB8ukSZO8wTNQa76UH4dPMNV+tTifDxEpEVRjPkR07WD4EJESDB8iUoLhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlLi/wOsWLfx3MdiAQAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 300x300 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "-3.1269710318178032"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "play(True)[-1]"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "第7章-DQN算法.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
